library("CostTrajectory")
In this document, I provide a simulation example for the CostTrajectory package and with beautiful plot examples for the paper. For more details on the methods behind, please check on:
citation("CostTrajectory")
This example takes the documentation from CostTrajectory() and shows the results when censored data is included in the estimation procedure. We simulate data and then fit them:
data <- CostTrajectory_simulate_data(n=50)
head(data)
## id surv death time status Y
## 1 1 9 112 1 0 13.073536
## 2 1 9 112 2 0 11.285352
## 3 1 9 112 3 0 11.000561
## 4 1 9 112 4 0 9.900468
## 5 1 9 112 5 0 8.961636
## 6 1 9 112 6 0 8.993106
fit <- CostTrajectory(time=data$time, surv=data$surv, cost=data$Y, id=data$id, status=data$status)
## Warning in fit_cost(B, theta.old, z, id, varfunc, usecensor, idx.sts, idx.lts, :
## parameter estimates did NOT converge in 20 iterations. Increase MAX.IT in
## control.
## Warning in fit_cost(B, theta.old, z, id, varfunc, usecensor, idx.sts, idx.lts, :
## parameter estimates did NOT converge in 20 iterations. Increase MAX.IT in
## control.
Then we check the plots and summary by looking at
plot(fit)
summary(fit)
##
## Number of Observations : 50
## of which maximum follow-up time tau: 100
## censored prior to tau: 7 ( 70 % )
## long-term survivors (LTS): 35 ( 14 % )
## (Selected) smoothing parameters
## over time-axis: 1e-05
## over surv-axis: 1e-05
## over LTS -axis: 1e-05
## (Estimated) correlation parameter (alpha):
## over uncensored pateints: 0.297
## over LTS: 0.0931
##
## Residuals:
## Min 1Q Median 3Q Maobject
## -5.3359 -1.0032 -0.2269 0.4880 6.2634
##
## Settings and control:
## each axis: 7 - differences of order 2
## convergence tolerance : 1e-04
## generalized cross validation (GCV) : 1351.6
Population mean results are also of interest for descripive analysis. We use a function CostTrajectory_PopulaionMean() to produce pointwise average estimates based solely on uncensored data.
fit.popmean <- CostTrajectory_PopulationMean(data)
tau <- max(data$surv)
Z0 <- matrix(NA,tau+2,tau)
for(i in 1:tau){
for(j in 1:i){
Z0[i,j] <- fit.popmean$Y[fit.popmean$death == i & fit.popmean$time == j]
}
}
for(j in 1:tau)
Z0[tau+2,j] <- fit.popmean$Y[fit.popmean$death == tau+1 & fit.popmean$time == j]
par(mfrow = c(1,2))
image2D(x=1:tau,y=1:(tau+2), t(Z0), main = 'Population mean heatmap',
xlab = 'time',ylab = 'survival')
persp3D(y=1:tau,x=1:(tau+2), z = Z0, main = 'Population mean surface',
ylab = 'time',xlab = 'survival', zlab='Cost',
clab = "Cost",theta=30,phi=25)
These plots are not smooth and curvy, because our sample size is small (n=50), and in some survival groups there is not data. Therefore CostTrajectory() is necessary to improve the interpretability of this data.
Thought it would be more efficient and elegant to is provide interactive plots, here we show how to do it. First we load packages and preprocess the results.
library(ggplot2)
library(wesanderson)
library(scales)
library(gridExtra)
library(plotly)
library(rayshader)
pal <- wes_palette("Zissou1", 10, type = "continuous")
ndx <- 5; deg <- 2
tau <- fit$tau
x <- 1:tau; y <- 1:tau
Bi <- fit$test$B
theta1 <- matrix(fit$coefficent$theta,ncol=1)
theta2 <- matrix(fit$coefficent.init$theta,ncol=1)
dat.test <- fit$test$data
s.lst <- c(c(2,4,6,8)*10,101)
vv1 = fit$coefficent$variance$variance
vv2 = fit$coefficent.init$variance$variance
Bi1 <- cbind(Bi, matrix(0,nrow(Bi),ndx+deg))
se1 = sqrt(diag(Bi1 %*% vv1 %*% t(Bi1)))
se2 = sqrt(diag(Bi1 %*% vv2 %*% t(Bi1)))
yhat1 <- as.vector(Bi %*% theta1)
yhat2 <- as.vector(Bi %*% theta2)
pal <- wes_palette("Zissou1", 10, type = "continuous")
dat.test$Y1 <- as.array(yhat1)
dat.test$Y2 <- as.array(yhat2)
idx <- which(dat.test$death %in% s.lst)
pt1d <- data.frame(dat.test, group = paste0('Fitted', dat.test$id),
Stage = rep('Fitted',nrow(dat.test)),
YY=yhat1,Ylb = yhat1-1.96*se1,Yub = yhat1+1.96*se1)
pt2d <- data.frame(dat.test, group = paste0('Initial', dat.test$id),
Stage = rep('Initial',nrow(dat.test)),
YY=yhat2,Ylb = yhat2-1.96*se1,Yub = yhat1+1.96*se2)
We plot the fitted curves on selected survival times, and compared two groups (Fitted vs Initial). Fitted'' uses censored data whileInitial’’ does not.
ptd <- rbind(pt1d[idx,],pt2d[idx,])
ptd$id[ptd$id==101] <- '100+'
ptd$id <- factor(ptd$id,levels=c("20","40","60","80","100+" ))
ptd$Method <- 'Estimate'
xx <- ptd[,c('death','time','YY','Stage','Ylb','Yub','id')]
p=ggplot(xx)+geom_line(aes(time,YY,group=Stage,color=Stage,linetype=Stage))+
geom_ribbon(data=subset(xx, 0 <= xx$time & xx$time <= tau),
aes(x=time,xmin=0,xmax=tau,ymin=Ylb,ymax=Yub,
group=Stage,fill=Stage), alpha=0.3) +
scale_fill_manual(values = c("blue", "red", 'green')) +ylim(0,15)+
xlab('Time')+ylab('Cost')+
scale_x_continuous(breaks = seq(0,tau, by = 10)) +
scale_color_manual(values = c("blue", "red"))+
theme(panel.background = element_rect(fill='white',color='black'),
panel.grid.major = element_line(colour = "lightgrey",size=.2),
legend.position = "none",
legend.box = "vertical",
plot.title = element_text(hjust = 0.5)) +
facet_grid(rows = vars(id))
ggplotly(p)
The blue and red curves are close in each survival group, suggesting our method handles the censored cost data well.
Next we shows the heatmap and 3D surface.
ptd <- rbind(pt1d,pt2d)
ptd$id <- as.factor(ptd$id)
yy <- ptd[ptd$death>tau,]
ptd <- ptd[ptd$death<=tau,]
yy$death <- tau+5
yy2 <- yy
for(i in (tau+6:15)){
yy1 <- yy; yy1$death <- i
yy2 <- rbind(yy2,yy1)
}
ptd1 <- rbind(ptd, yy2)
p <- ggplot(ptd1,aes(y=death,x=time))+
geom_raster(aes(fill=YY)) +
geom_contour(aes(z=Y),color='black',breaks=c(3,6,9))+
xlab('Time')+ylab('Survival ')+
scale_fill_gradientn(colours = pal, limits=c(0,18),name='Cost\n',
values = rescale(c(0,8,10,18),
from = c(0,18)))+
theme(panel.background = element_rect(fill='white',color='black'),
plot.title = element_text(hjust = 0.5), legend.key.height = unit(1, "cm")
)+
scale_x_continuous(breaks = seq(0,tau, by = 10)) +
scale_y_continuous(breaks = seq(0,tau, by = 10)) +
facet_grid(cols = vars(Stage))
ggplotly(p)
## Warning: Raster pixels are placed at uneven vertical intervals and will be
## shifted. Consider using geom_tile() instead.
y = 1:(tau+15); x=1:tau
z <- matrix(NA, tau,tau+15)
for(i in 1:tau) z[(1:i),i] <- ptd1$YY[ptd1$group == paste0('Fitted',i)]#Local/Regional
for(i in (6:15)+tau) z[,i] <- ptd1$YY[(ptd1$group == paste0('Fitted',tau+1)) & (ptd1$death==i)]
p <- plot_ly(x=y, y=x, z=z,colors = colorRamp(pal,bias=2,alpha=0.7),
type = 'surface',
contours = list(
x = list(show = TRUE, start = 20, end = tau, size = 20, color = 'white'),
y = list(show = TRUE, start = 20, end = tau, size = 20, color = 'white'),
z = list(show = TRUE, start = 3, end = 15, size = 1, color = 'white'))) %>% layout(
scene = list(
camera=list(
eye = list(x=2, y=1, z=.5)
)
)
) %>% layout(scene = list(yaxis = list(title='Time',
tickvals = seq(20,tau,20)),
xaxis = list(title='Survrival',
tickvals = seq(20,tau,20)),
zaxis = list(title='Cost',
tickvals = seq(0,15,5))))
p